import codecs
import json
import os

def load_data(f1):
    dict = json.load(f1)
    data = dict["data"]
    context = []
    answer = []
    question = []
    answer_start = []
    for i in range(len(data)):
        data_part = data[i]
        par = data_part["paragraphs"]
        par_1 = par[0]
        context_part = par_1["context"]
        context.append(context_part)
        qas = par_1["qas"]
        answer_part = []
        question_part = []
        answer_start_part = []
        for j in range(len(qas)):
            qas_part = qas[j]
            ans = qas_part["answers"]
            ans_1 = ans[0]
            ans_start = ans_1["answer_start"]
            text = ans_1["text"]
            ques = qas_part["question"]
            answer_part.append(text)
            question_part.append(ques)
            answer_start_part.append(ans_start)
        answer.append(answer_part)
        question.append(question_part)
        answer_start.append(answer_start_part)

    return context,answer,question,answer_start

def generate_repositioning(context,answer,question,answer_start,q):
    add = str(q)+"_repositioning_merge.json"
    path = "result"
    file = os.path.join(path, add)
    f = codecs.open(file, "w", encoding="utf-8")

    all = {}
    dic = []
    id_ = 0
    for i in range(len(context)):
        context_part = context[i]
        query_part = question[i]
        answer_part = answer[i]
        update_index_part = answer_start[i]
        dic_in = {}
        dic_in["title"] = " "
        par = []
        par_in = {}
        par_in["context"] = context_part
        qas = []
        for j in range(len(query_part)):
            qas_in = {}
            ans = []
            ans_in = {}
            ans_in["answer_start"] = update_index_part[j]
            ans_in["text"] = answer_part[j]
            ans.append(ans_in)
            qas_in["answers"] = ans
            qas_in["question"] = query_part[j]
            qas_in["id"] = str(id_)
            id_ = id_ + 1
            qas.append(qas_in)
        par_in["qas"] = qas
        par.append(par_in)
        dic_in["paragraphs"] = par
        dic.append(dic_in)
    all["data"] = dic
    all["version"] = str(1.1)
    json.dump(all, f)
    f.close()




if __name__ == "__main__":

    path = "result"
    for q in range(1,20):
        if q-1 == 0:
            add_merge = "0_repositioning.json"
        else:
            add_merge = str(q-1)+"_repositioning_merge.json"
        pre_file = os.path.join(path, add_merge)
        add = str(q)+"_repositioning.json"
        now_file = os.path.join(path, add)

        f1 = codecs.open(pre_file, "r", encoding="utf-8")
        f2 = codecs.open(now_file, "r", encoding="utf-8")

        context1,answer1,question1,answer_start1 = load_data(f1)
        context2, answer2, question2, answer_start2 = load_data(f2)
        for i in range(len(context2)):
            context2_part = context2[i]
            a = 0
            for j in range(len(context1)):
                if context2_part == context1[j]:
                    a = 1
                    for m in range(len(question2[i])):
                        if question2[i][m] not in question1[j]:
                            question1[j].append(question2[i][m])
                            answer1[j].append(answer2[i][m])
                            answer_start1[j].append(answer_start2[i][m])
            if a == 0:
                context1.append(context2_part)
                answer1.append(answer2[i])
                question1.append(question2[i])
                answer_start1.append(answer_start2[i])

        generate_repositioning(context1,answer1,question1,answer_start1,q)
        f1.close()
        f2.close()
